##Replication code for main figures and S5-7 of "Understanding Political Communication and Political Communicators on Twitch"
##Sangyeon Kim
#Load necessary packages
library(ggplot2)
library(lubridate) 
library(moments)
library(stm)
library(tidyverse)
library(tidytext)

######################Figure 3 to 4 ###############################
#Load data for Figure 3 to 4
load('./data/fig3_to_4.rda')

#to ensure all the numbers appear in natural number format
options(scipen = 999)

#Figure 3-1. "created_at" histogram
#convert the character to date-time object
prof_info_data$created_at_dt <- ymd_hms(prof_info_data$created_at, tz = "UTC")

#create the plot
pdf("./figures/figure3_1.pdf", width = 10, height = 5)
ggplot(prof_info_data, aes(x =created_at_dt)) +
  geom_histogram(binwidth = 30 * 24 * 60 * 60, fill = "black", color = "white") +  # Monthly bins
  scale_x_datetime(date_labels = "%Y-%m", date_breaks = "12 months") +  # Customize x-axis labels
  labs(x = "Date (Created at)", y = "Frequency") +
  theme_minimal()+
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 10)   # Axis text font size
  )
dev.off()

#Figure 3-2. profile "viewcount" histogram

pdf("./figures/figure3_2.pdf", width = 5, height = 5)
ggplot(prof_info_data, aes(x = log10(view_count))) +
  geom_histogram(binwidth = 1, fill = "black", color = "white") +
  labs(x = "View Count", y = "Frequency") +
  scale_x_continuous(
    labels = function(x) round(10^x, 0)  # Convert log10 back to raw values and round to the nearest whole number
  ) +
  theme_minimal()+
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#skewness stat
fig_3_2_skew <- agostino.test(prof_info_data$view_count)
print(fig_3_2_skew)

#Figure 4-1. "Gender" barplots
pdf("./figures/figure4_1.pdf", width = 5, height = 5)
ggplot(prof_info_data[!is.na(prof_info_data$gender), ], aes(x = gender)) +
  geom_bar(fill = "black", color = "white") +
  labs(x = "Gender", y = "Count") +
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

# Count the number of rows without NAs in the gender column
gender_count <- sum(!is.na(prof_info_data$gender))
print(gender_count) #88

#Figure 4-2. "Ideology" barplots
pdf("./figures/figure4_2.pdf", width = 5, height = 5)
ggplot(prof_info_data[!is.na(prof_info_data$ideology), ], aes(x = ideology)) +
  geom_bar(fill = "black", color = "white") +
  labs(x = "Ideology", y = "Count") +
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

# Count the number of rows without NAs in the ideology column
ideo_count <- sum(!is.na(prof_info_data$ideology))
print(ideo_count) #119

######################Figure 5 to 8 ###############################

#Load data for Figure 5 to 8
load('./data/fig5_to_8.rda')

#Figure 5. Streaming frequency
pdf("./figures/figure5.pdf")
ggplot(plot_data,aes(x = broad_freqency)) + 
  geom_histogram(fill = "black", color = "white") + 
  #scale_x_log10()+
  labs(x="Days of streamings", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#skewness stat
fig_5_skew <- agostino.test(plot_data$broad_freqency)
print(fig_5_skew)

#Figure 6-1. The number of chat posts

pdf("./figures/figure6_1.pdf")
ggplot(plot_data,aes(x = chat_num)) + 
  geom_histogram(fill = "black", color = "white") +  
  scale_x_log10()+
  labs(x="Number of chat posts", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 6-2. The number of chat posts per day

pdf("./figures/figure6_2.pdf")
ggplot(plot_data,aes(x = chat_num_per_day)) + 
  geom_histogram(fill = "black", color = "white") +  
  scale_x_log10()+
  labs(x="Number of chat posts per day", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 7. The number of unique users

pdf("./figures/figure7.pdf")
ggplot(plot_data,aes(x = num_unique_chat_users)) + 
  geom_histogram(fill = "black", color = "white") +  
  scale_x_log10()+
  labs(x="Number of unique users", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  ) 
dev.off()

#skewness stat
fig_7_skew <- agostino.test(plot_data$num_unique_chat_users)
print(fig_7_skew)

#Figure 8-1. The number of chat posts (mean)

pdf("./figures/figure8_1.pdf")
ggplot(plot_data,aes(x = mean_chat_freq_wm)) + 
  geom_histogram(fill = "black", color = "white") + 
  #scale_x_log10()+
  labs(x="Chat Frequency (Mean)", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 8-2. The number of chat posts (max)

pdf("./figures/figure8_2.pdf")
ggplot(plot_data,aes(x = max_chat_freq_wm)) + 
  geom_histogram(fill = "black", color = "white") + 
  scale_x_log10()+
  labs(x="Chat Frequency (Max)", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#skewness stat
fig_8_2_skew <- agostino.test(log10(plot_data$max_chat_freq_wm))
print(fig_8_2_skew)

######################Figure 9 to 13, Figure E1 to F2, Figure F1 to F6, Figure G1 to G3 ###############################

#Load data for Figure 9 to 13
load('./data/fig9_to_13.rda')

#Fit the topic model using STM package

sampled_all$date<-as.Date(sampled_all$time)
sampled_all$date<-as.numeric(sampled_all$date)-as.numeric(min(sampled_all$date)) 

set.seed(1542)
processed <- textProcessor(sampled_all$text, metadata = sampled_all)
out <- prepDocuments(processed$documents, processed$vocab,processed$meta,lower.thresh = 1)
docs <- out$documents
vocab <- out$vocab
meta <-out$meta

#this part could take a long time (35 hours in i5 laptop)
#LDA_agg_150<-stm(documents=out$documents,vocab=out$vocab,K=150, 
#                 max.em.its = 1000, data = out$meta, init.type = "LDA", seed=1542)

#save(LDA_agg_150, file="./data/topicmodel_results.rda")

#load topic model results
load("./data/topicmodel_results.rda")

#Figure F1 to F6
lin150_topics <- tidy(LDA_agg_150, matrix = "beta")
lin150_terms <- lin150_topics %>%
  group_by(topic) %>%
  slice_max(beta, n = 15) %>% 
  ungroup() %>%
  arrange(topic, -beta)

# Split lin150_terms by 10
lin150_terms_1 <- lin150_terms[lin150_terms$topic < 16 & lin150_terms$topic > 0, ]
lin150_terms_2 <- lin150_terms[lin150_terms$topic < 31 & lin150_terms$topic > 15, ]
lin150_terms_3 <- lin150_terms[lin150_terms$topic < 46 & lin150_terms$topic > 30, ]
lin150_terms_4 <- lin150_terms[lin150_terms$topic < 61 & lin150_terms$topic > 45, ]
lin150_terms_5 <- lin150_terms[lin150_terms$topic < 76 & lin150_terms$topic > 60, ]
lin150_terms_6 <- lin150_terms[lin150_terms$topic < 91 & lin150_terms$topic > 75, ]
lin150_terms_7 <- lin150_terms[lin150_terms$topic < 106 & lin150_terms$topic > 90, ]
lin150_terms_8 <- lin150_terms[lin150_terms$topic < 121 & lin150_terms$topic > 105, ]
lin150_terms_9 <- lin150_terms[lin150_terms$topic < 136 & lin150_terms$topic > 120, ]
lin150_terms_10 <- lin150_terms[lin150_terms$topic < 151 & lin150_terms$topic > 135, ]

# Generate 10 PDFs with the updated splits

pdf("./figures/figureF1.pdf", width = 10, height = 10)
lin150_terms_1 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF2.pdf", width = 10, height = 10)
lin150_terms_2 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF3.pdf", width = 10, height = 10)
lin150_terms_3 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF4.pdf", width = 10, height = 10)
lin150_terms_4 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF5.pdf", width = 10, height = 10)
lin150_terms_5 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF6.pdf", width = 10, height = 10)
lin150_terms_6 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF7.pdf", width = 10, height = 10)
lin150_terms_7 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF8.pdf", width = 10, height = 10)
lin150_terms_8 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF9.pdf", width = 10, height = 10)
lin150_terms_9 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

pdf("./figures/figureF10.pdf", width = 10, height = 10)
lin150_terms_10 %>%
  mutate(term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(beta, term, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  scale_y_reordered()
dev.off()

#Figure E1, E2
#political topics and labels identified based on the coding rule explained in S4
pol_topics<-c(12, 13, 17, 18, 20, 23,28, 32, 33, 36, 42, 46,57, 61, 63, 65, 67
              ,69, 70, 71, 72, 73, 75, 78, 79, 82, 84, 85, 86, 95, 108, 111, 112, 114, 116
              ,118, 121, 123, 126, 131, 139,142, 146, 150) 

topic_labels<-c("Democracy and Communism ('dem', 'commi')","Illuminati and Trade ('theilluminati', 'trade')",
                "Kiev and Turkey ('turk', 'kiev')", "Trump and Gender ('trump', 'woman', 'gun')",
                "Black Christanity ('black', 'jesus')","Election and vote ('vote', 'elect', 'poll')",
                "Libertarianism ('libertarian')","Texas and Gas ('austin', 'tax', 'gas')",
                "Religion ('religion')", "Fake news and Freedom ('news', 'fake', 'freedom)",
                "Republican and Democrts ('parti', 'republican', 'democrat')", "Putin's invasion ('putin', 'invad)",
                "Ukraine war ('ukrain', 'russian', 'ukrainian', 'troop')",  "Gender ('women', 'men', 'tran')",
                "Gender and drug ('sex', 'drug', 'gender', 'legal')","Economy ('capitalist')",
                "Florida Police Violence ('polic', 'florida', 'violenc')",
                "Asian issue ('asian', 'ethnic')", "BLM protest ('protest', 'blm')",
                "Russian politics ('race', 'soviet','union')", "Leftist ('left', 'leftist')",
                "Propaganda ('propaganda')", "Patriot and Anarchist ('patriot', 'anarchist')",
                "Communism ('communism')", "Eurapean Climate ('german', 'irish','climat')",
                "Biden politics ('biden', 'stock', 'corrupt')", "Soviet ('ussr')",
                "Covid Vaccine Mandate ('covid', 'vaccin', 'mandat')", "Russian-Ukraine ('kyiv')",
                "AOC ('aoc')","President and Terrorist ('presid', 'speech', 'terrorist')",
                "China and Taiwan ('china', 'taiwan')","Socialist ('socialist')",
                "Feminism ('feminist')","Trans Right ('transgenderprid')",
                "International disputes ('russia', 'nato', 'democraci', 'afghanistan', 'iraq', 'india')", "Racism 1 ('racist', 'idiot')",
                "Racism 2 ('racism')", "Kyle Rittenhouse ('murder', 'kyle')",
                "Fascist ('fascist', 'weapon')","USA and Communism ('american', 'usa', 'communist', 'europe')", 
                "Environmental issue ('nuclear', 'plant')","Healthcare ('healthcare')",
                "US politics and Ukraine conflict ('berni', 'zelenski')")

#create topic proportion table
topic_proportion<-make.dt(LDA_agg_150)
top_prop_final<-summarise_all(topic_proportion, mean)
a<-as.numeric(top_prop_final)
a<-na.omit(a)
top_prop_all<-a[2:151]
top_prop_pol<-top_prop_all[c(12, 13, 17, 18, 20, 23,28, 32, 33, 36, 42, 46,57, 61, 63, 65, 67
                             ,69, 70, 71, 72, 73, 75, 78, 79, 82, 84, 85, 86, 95, 108, 111, 112, 114, 116
                             ,118, 121, 123, 126, 131, 139,142, 146, 150)]

#reordering based on the topic proportion
top_rank_dt<-as.data.frame(cbind(pol_topics, topic_labels,top_prop_pol))
top_rank_dt<-as_tibble(top_rank_dt)

top_rank_dt<-top_rank_dt%>% arrange(desc(top_prop_pol))

pdf("./figures/figureE1.pdf", width = 10, height = 15)
plot.STM(LDA_agg_150,type="summary", main="Ranking of 44 political topics (1)",xlim=c(0,0.025),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[1:22]),topic.names = top_rank_dt$topic_labels[1:22])
dev.off() 

pdf("./figures/figureE2.pdf", width = 10, height = 15)
plot.STM(LDA_agg_150,type="summary", main="Ranking of 44 political topics (2)",xlim=c(0,0.025),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[23:44]),topic.names = top_rank_dt$topic_labels[23:44])
dev.off() 

#Figure 9, Table 1
int<-c(5,8,9,17,24, 27 ,28,30, 35,41)
as.numeric(top_rank_dt$pol_topics[int])
top_rank_dt$topic_labels[int]
sum(as.numeric(top_rank_dt$top_prop_pol[int])) #0.06193285 (table1-row1)
sum(as.numeric(top_rank_dt$top_prop_pol[int]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.2210711 (table1-row1)

pdf("./figures/figure9.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on international issues",xlim=c(0,0.015),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[int]),topic.names = top_rank_dt$topic_labels[int])
dev.off()

#Figure 10, Table 1
usp<-c(4,6,11,19,20,29,32,37,38,43)
as.numeric(top_rank_dt$pol_topics[usp])
top_rank_dt$topic_labels[usp]
sum(as.numeric(top_rank_dt$top_prop_pol[usp])) #0.0614201 (table1-row2)
sum(as.numeric(top_rank_dt$top_prop_pol[usp]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.2192408 (table1-row2)

pdf("./figures/figure10.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on the US politics",xlim=c(0,0.015),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[usp]),topic.names = top_rank_dt$topic_labels[usp])
dev.off()

#Figure 11, Table 1
iden<-c(1, 13,15,18,21,25,26,36,40)
as.numeric(top_rank_dt$pol_topics[iden])
top_rank_dt$topic_labels[iden]
sum(as.numeric(top_rank_dt$top_prop_pol[iden])) #0.05998299 (table1-row3)
sum(as.numeric(top_rank_dt$top_prop_pol[iden]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.214111 (table1-row3)

pdf("./figures/figure11.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on identity politics",xlim=c(0,0.018),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[iden]),topic.names = top_rank_dt$topic_labels[iden])
dev.off()

#Figure 12, Table 1
ideo<-c(3, 7, 10, 22, 31, 42,44)
as.numeric(top_rank_dt$pol_topics[ideo])
top_rank_dt$topic_labels[ideo]
sum(as.numeric(top_rank_dt$top_prop_pol[ideo])) #0.04470375
sum(as.numeric(top_rank_dt$top_prop_pol[ideo]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.1595714

pdf("./figures/figure12.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on ideological debate",xlim=c(0,0.015),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[ideo]),topic.names = top_rank_dt$topic_labels[ideo])
dev.off()

#Figure G1
pog<-c(2 , 33, 34, 39)
as.numeric(top_rank_dt$pol_topics[pog])
top_rank_dt$topic_labels[pog]
sum(as.numeric(top_rank_dt$top_prop_pol[pog])) #0.02682962
sum(as.numeric(top_rank_dt$top_prop_pol[pog]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.09576911

pdf("./figures/figureG1.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on politics in general",xlim=c(0,0.015),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[pog]),topic.names = top_rank_dt$topic_labels[pog])
dev.off()

#Figure G2
php<-c(12, 14)
as.numeric(top_rank_dt$pol_topics[php])
top_rank_dt$topic_labels[php]
sum(as.numeric(top_rank_dt$top_prop_pol[php])) #0.01298952
sum(as.numeric(top_rank_dt$top_prop_pol[php]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.04636645

pdf("./figures/figureG2.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on public health and politics",xlim=c(0,0.015),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[php]),topic.names = top_rank_dt$topic_labels[php])
dev.off()

#Figure G3
ei<-c(16, 23)
as.numeric(top_rank_dt$pol_topics[ei])
top_rank_dt$topic_labels[ei]
sum(as.numeric(top_rank_dt$top_prop_pol[ei])) #0.01229017
sum(as.numeric(top_rank_dt$top_prop_pol[ei]))/sum(as.numeric(top_rank_dt$top_prop_pol)) #0.04636645

pdf("./figures/figureG3.pdf", width = 12, height = 5)
plot.STM(LDA_agg_150,type="summary", main="Ranking of topics on enviornmental issue",xlim=c(0,0.015),custom.labels="",n=10,topics = as.numeric(top_rank_dt$pol_topics[ei]),topic.names = top_rank_dt$topic_labels[ei])
dev.off()

#figure 13
#23 131 142
emo<-c(23,131,142)
lin150_terms_emo<-lin150_terms[lin150_terms$topic==emo[1], ]
for(i in 2:3){
  a<-lin150_terms[lin150_terms$topic==emo[i], ]
  lin150_terms_emo<-rbind(lin150_terms_emo,a)
}

a<-c(2,6,7,13,25,26,
     37,41)

lin150_terms_emo$pol<-0
for(i in 1:length(a)){
  lin150_terms_emo$pol[a[i]]<-1
}

lin150_terms_emo <- lin150_terms_emo %>%
  mutate(color = ifelse(pol == 1, "blue", "red"))

b<-c(11,27,35)
for(i in 1:3){
  lin150_terms_emo$color[b[i]]<-"green"
}

colnames(lin150_terms_emo) <- c('Topic','Keyword','Beta','Pol','Color')

lin150_terms_emo$Topic<-as.character(lin150_terms_emo$Topic)

lin150_terms_emo[which(lin150_terms_emo$Topic==23),]$Topic<-"Election and Potchamp"
lin150_terms_emo[which(lin150_terms_emo$Topic==131),]$Topic<-"Facist and Kkapitalist"
lin150_terms_emo[which(lin150_terms_emo$Topic==142),]$Topic<-"Environmental issues and Bboomer"

pdf("./figures/figure13.pdf", width = 15, height = 5)
lin150_terms_emo %>%
  mutate(term = reorder_within(Keyword, Beta, Topic)) %>%
  ggplot(aes(Beta, term, fill = Color)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ Topic, scales = "free") +
  scale_y_reordered()
dev.off() 

######################Figure 14 to 18 ###############################

#Load data for Figure 14 to 18
load('./data/fig14_to_18.rda')

#Figure 14. Ratio of isolates
iso_ratio<-vector(mode="numeric",length=length(network_sumstat))
for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    a<-network_sumstat[[i]][[3]][[1]]
    iso_ratio[[i]]<-length(which(a==0))/length(a)
  }
  else{
    iso_ratio[[i]]<-NA  
  }
}

plot_data <- data.frame(iso_ratio = iso_ratio)

pdf("./figures/figure14.pdf",width = 6, height = 6)
ggplot(plot_data,aes(x = iso_ratio)) + 
  geom_histogram(fill = "black", color = "white") + 
  labs(x="Ratio of isolates", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 15. Distribution of mean node degrees.
mean_deg<-vector(mode="numeric",length=length(network_sumstat))

for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    mean_deg[[i]]<-mean(network_sumstat[[i]][[3]][[1]])
  }
  else{
    mean_deg[[i]]<-NA  
  }
}

plot_data<-as.data.frame(mean_deg)

pdf("./figures/figure15.pdf",width = 6, height = 6)
ggplot(plot_data,aes(x = as.numeric(mean_deg))) + 
  geom_histogram(fill = "black", color = "white") + 
  scale_x_log10()+
  labs(x="Mean degree", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 16. Distribution of max node degrees
max_deg<-vector(mode="numeric",length=length(network_sumstat))

for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    max_deg[[i]]<-max(network_sumstat[[i]][[3]][[1]])
  }
  else{
    max_deg[[i]]<-NA  
  }
}

max_deg_in<-vector(mode="numeric",length=length(network_sumstat))

for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    max_deg_in[[i]]<-max(network_sumstat[[i]][[4]][[1]])
  }
  else{
    max_deg_in[[i]]<-NA  
  }
}

max_deg_out<-vector(mode="numeric",length=length(network_sumstat))

for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    max_deg_out[[i]]<-max(network_sumstat[[i]][[5]][[1]])
  }
  else{
    max_deg_out[[i]]<-NA  
  }
}

plot_data<-as.data.frame(cbind(plot_data,max_deg,max_deg_in,max_deg_out))

pdf("./figures/figure16_1.pdf",width = 6, height = 6)
ggplot(plot_data,aes(x = as.numeric(max_deg))) + 
  geom_histogram(fill = "black", color = "white") + 
  scale_x_log10()+
  labs(x="Max degree", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

pdf("./figures/figure16_2.pdf",width = 6, height = 6)
ggplot(plot_data,aes(x = as.numeric(max_deg_in))) + 
  geom_histogram(fill = "black", color = "white") + 
  scale_x_log10()+
  labs(x="Max in-degree", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

pdf("./figures/figure16_3.pdf",width = 6, height = 6)
ggplot(plot_data,aes(x = as.numeric(max_deg_out))) + 
  geom_histogram(fill = "black", color = "white") + 
  scale_x_log10()+
  labs(x="Max out-degree", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 17. Distribution of reciprocity scores
recip<-vector(mode="numeric",length=length(network_sumstat))

for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    recip[[i]]<-network_sumstat[[i]][[6]]
  }
  else{
    recip[[i]]<-NA  
  }
}

plot_data<-as.data.frame(recip)

pdf("./figures/figure17.pdf",width = 6, height = 6)
ggplot(plot_data,aes(x = as.numeric(recip))) + 
  geom_histogram(fill = "black", color = "white", bins = 30) + 
  labs(x="Reciprocity", y = "Count")+
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()

#Figure 18. Power law or not
powerlawtf<-vector(mode="numeric",length=length(network_sumstat))

for(i in 1:length(network_sumstat)){
  if(length(network_sumstat[[i]][[13]])>0){
    if(network_sumstat[[i]][[12]][[1]]>0.05){
      powerlawtf[[i]]<-1
    }
    else{
      powerlawtf[[i]]<-0
    }
  }
  else{
    powerlawtf[[i]]<-NA  
  }
}

plot_data <- data.frame(powerlawtf = as.factor(powerlawtf[!is.na(powerlawtf)]))

pdf("./figures/figure18.pdf",width = 6, height = 6)
ggplot(plot_data, aes(x = na.omit(powerlawtf))) + 
  geom_bar(fill = "black", color = "white") + 
  scale_x_discrete(labels = c("No", "Yes")) +  # Renaming the factor levels
  labs(x = "", y = "Count") +
  theme_minimal() +
  theme(
    text = element_text(size = 14),       # General text size
    axis.title = element_text(size = 14), # Axis title font size
    axis.text = element_text(size = 14)   # Axis text font size
  )
dev.off()